import pandas as pd
# to communicate with Google BigQuery
from pandas.io import gbq
import plotly.plotly as py
import plotly.graph_objs as go
import cufflinks as cf
project_id = 'poised-list-163221'
This procedure creates a grouped bar chart showing prices/cost for different drugs and procedures, grouped by region
It also adds a line graph showing the volume of prescribed drugs and completed procedures for the same regions. The volume is represented on the right y-axis.
def createbarfig(df_max, x_max, y_max, df_min, x_min, y_min, claims, region, x_axis, y_axis, title):
cf.set_config_file(offline = True, world_readable = True, theme = 'pearl')
trace1 = go.Bar(
x = df_max[x_max],
y = df_max[y_max],
name = 'max',
text = df_max[region],
xaxis = 'x',
yaxis = 'y',
opacity = 0.6
)
trace2 = go.Bar(
x = df_min[x_min],
y = df_min[y_min],
name = 'min',
text = df_min[region],
xaxis = 'x',
yaxis = 'y',
opacity = 0.6
)
trace3 = go.Scatter(
x = df_min[x_min],
y = df_min[claims],
name = 'min_claims',
text = df_min[region],
xaxis = 'x',
yaxis = 'y2',
marker = dict(color = 'rgb(255,178,102)' )
)
trace4 = go.Scatter (
x = df_max[x_max],
y = df_max[claims],
name = 'max_claims',
text = df_max[region],
xaxis = 'x',
yaxis = 'y2',
marker = dict(color = 'rgb(102,178,255)')
)
data = [trace1, trace2, trace3, trace4]
layout = go.Layout(
barmode = 'group',
margin = dict(b = 250),
yaxis = dict(title = y_axis, side = 'left', anchor = 'x', overlaying = 'y2'),
yaxis2 = dict(title = 'number_claims', side = 'right', anchor = 'x'),
xaxis = dict(title = x_axis),
title = title,
autosize = False,
height = 750,
width = 750,
legend = dict(x =-.1, y = 1.2))
fig = go.Figure(data = data, layout=layout)
return fig
This query selects the region, and total claim numbers for each drug where the unitcost is at a minimum among the top 10 prescribed list from the 2014 Medicare claims public dataset.
minquery = """
select * from
(
select main_table.drug, main_table.region, main_table.unitcost, main_table.total_claims from
(select drug, round(min(unitcost),4) as min_cost
from
(
select region, drug, unitcost
from
(
select concat(city,' ',state) as region, drug, unitcost
from
(SELECT nppes_provider_city AS city, nppes_provider_state as state, drug_name as drug,
(sum(total_drug_cost)/sum(total_day_supply)) AS unitcost
FROM [bigquery-public-data:medicare.part_d_prescriber_2014]
GROUP BY city, state, drug
) as sq1) as sq2
where drug in
(select drug_name from (select drug_name, sum(total_claim_count) as cost
from [bigquery-public-data:medicare.part_d_prescriber_2014]
group by drug_name
order by cost desc
limit 10) as sq3)
and region in
(select concat(city,' ',state) as region from (select nppes_provider_city as city,
nppes_provider_state as state, sum(total_claim_count) as total_claims
from [bigquery-public-data:medicare.part_d_prescriber_2014]
group by city, state
order by total_claims desc
limit 10) as sq4)
order by drug, unitcost
)as sq5
group by drug)as min_table
join
(select region, drug, round(unitcost,4) as unitcost, total_claims
from
(
select concat(city,' ',state) as region, drug, unitcost, total_claims
from
(SELECT nppes_provider_city AS city, nppes_provider_state as state, drug_name as drug, sum(total_claim_count) as total_claims,
(sum(total_drug_cost)/sum(total_day_supply)) AS unitcost
FROM [bigquery-public-data:medicare.part_d_prescriber_2014]
GROUP BY city, state, drug
) as sq1) as sq2
where drug in
(select drug_name from (select drug_name, sum(total_claim_count) as total_claims
from [bigquery-public-data:medicare.part_d_prescriber_2014]
group by drug_name
order by total_claims desc
limit 10) as sq3)
and region in
(select concat(city,' ',state) as region from (select nppes_provider_city as city,
nppes_provider_state as state, sum(total_claim_count) as total_claims
from [bigquery-public-data:medicare.part_d_prescriber_2014]
group by city, state
order by total_claims desc
limit 10)))as main_table
on main_table.unitcost = min_table.min_cost and main_table.drug = min_table.drug
) as sqx order by main_table.drug
"""
drug_price_min_df = gbq.read_gbq(minquery, project_id = project_id)
This query selects the region, and total claim numbers for each drug where the unitcost is at a maximum among the top 10 prescribed list from the 2014 Medicare claims public dataset.
maxquery = """
select * from
(
select main_table.drug, main_table.region, main_table.unitcost, main_table.total_claims from
(select drug, round(max(unitcost),4) as max_cost
from
(
select region, drug, unitcost
from
(
select concat(city,' ',state) as region, drug, unitcost
from
(SELECT nppes_provider_city AS city, nppes_provider_state as state, drug_name as drug,
(sum(total_drug_cost)/sum(total_day_supply)) AS unitcost
FROM [bigquery-public-data:medicare.part_d_prescriber_2014]
GROUP BY city, state, drug
) as sq1) as sq2
where drug in
(select drug_name from (select drug_name, sum(total_claim_count) as cost
from [bigquery-public-data:medicare.part_d_prescriber_2014]
group by drug_name
order by cost desc
limit 10) as sq3)
and region in
(select concat(city,' ',state) as region from (select nppes_provider_city as city,
nppes_provider_state as state, sum(total_claim_count) as total_claims
from [bigquery-public-data:medicare.part_d_prescriber_2014]
group by city, state
order by total_claims desc
limit 10) as sq4)
order by drug, unitcost
)as sq5
group by drug)as max_table
join
(select region, drug, round(unitcost,4) as unitcost, total_claims
from
(
select concat(city,' ',state) as region, drug, unitcost, total_claims
from
(SELECT nppes_provider_city AS city, nppes_provider_state as state, drug_name as drug, sum(total_claim_count) as total_claims,
(sum(total_drug_cost)/sum(total_day_supply)) AS unitcost
FROM [bigquery-public-data:medicare.part_d_prescriber_2014]
GROUP BY city, state, drug
) as sq1) as sq2
where drug in
(select drug_name from (select drug_name, sum(total_claim_count) as total_claims
from [bigquery-public-data:medicare.part_d_prescriber_2014]
group by drug_name
order by total_claims desc
limit 10) as sq3)
and region in
(select concat(city,' ',state) as region from (select nppes_provider_city as city,
nppes_provider_state as state, sum(total_claim_count) as total_claims
from [bigquery-public-data:medicare.part_d_prescriber_2014]
group by city, state
order by total_claims desc
limit 10)))as main_table
on main_table.unitcost = max_table.max_cost and main_table.drug = max_table.drug
) as sqx order by main_table.drug
"""
drug_price_max_df = gbq.read_gbq(maxquery, project_id = project_id)
drug_dif_fig = createbarfig(drug_price_max_df, 'main_table_drug', 'main_table_unitcost',\
drug_price_min_df, 'main_table_drug', 'main_table_unitcost',\
'main_table_total_claims', \
'main_table_region', 'drug name', 'cost/unit in $',\
'Price difference b/w frequently prescribed Medicare Rx')
cf.iplot(drug_dif_fig)
maxquery_inpatient = """
select maint.dx, maint.region, maint.total_cost, maint.discharge_num
from
(
select dx, max(total_cost) as max_cost
from
(
select dx, region, round(total_cost,0) as total_cost
from
(
select dx, concat(city,' ',state) as region, total_cost
from
(
select drg_definition as dx, provider_city as city, provider_state as state,
sum(average_medicare_payments) as total_cost
from [bigquery-public-data:medicare.inpatient_charges_2014]
group by dx, city, state
) as sq) as sq2
where dx in (select dx from
(
select drg_definition as dx, sum(total_discharges) as discharge_num
from [bigquery-public-data:medicare.inpatient_charges_2014]
group by dx
order by discharge_num desc
limit 10
)as sq3)
and region in
(select concat(city,' ',state) as region from
(select provider_city as city, provider_state as state, sum(total_discharges) as discharge_num
from [bigquery-public-data:medicare.inpatient_charges_2014]
group by city, state
order by discharge_num desc
limit 10
)as sq4)
order by dx, total_cost desc
) as sq5
group by dx
) as maxt
JOIN
(select dx, region, round(total_cost,0) as total_cost, discharge_num
from
(
select dx, concat(city,' ',state) as region, total_cost, discharge_num
from
(
select drg_definition as dx, provider_city as city, provider_state as state, sum(total_discharges) as discharge_num,
sum(average_medicare_payments) as total_cost
from [bigquery-public-data:medicare.inpatient_charges_2014]
group by dx, city, state
) as sq) as sq2
where dx in (select dx from
(
select drg_definition as dx, sum(total_discharges) as discharge_num
from [bigquery-public-data:medicare.inpatient_charges_2014]
group by dx
order by discharge_num desc
limit 10
)as sq3)
and region in
(select concat(city,' ',state) as region from
(select provider_city as city, provider_state as state, sum(total_discharges) as discharge_num
from [bigquery-public-data:medicare.inpatient_charges_2014]
group by city, state
order by discharge_num desc
limit 10
)as sq4)
order by dx, total_cost desc) as maint
on maint.total_cost = maxt.max_cost and maint.dx = maxt.dx
order by maint.dx
"""
inpatient_max_df = gbq.read_gbq(maxquery_inpatient, project_id = project_id)
minquery_inpatient = """
select maint.dx, maint.region, maint.total_cost, maint.discharge_num
from
(
select dx, min(total_cost) as min_cost
from
(
select dx, region, round(total_cost,0) as total_cost
from
(
select dx, concat(city,' ',state) as region, total_cost
from
(
select drg_definition as dx, provider_city as city, provider_state as state,
sum(average_medicare_payments) as total_cost
from [bigquery-public-data:medicare.inpatient_charges_2014]
group by dx, city, state
) as sq) as sq2
where dx in (select dx from
(
select drg_definition as dx, sum(total_discharges) as discharge_num
from [bigquery-public-data:medicare.inpatient_charges_2014]
group by dx
order by discharge_num desc
limit 10
)as sq3)
and region in
(select concat(city,' ',state) as region from
(select provider_city as city, provider_state as state, sum(total_discharges) as discharge_num
from [bigquery-public-data:medicare.inpatient_charges_2014]
group by city, state
order by discharge_num desc
limit 10
)as sq4)
order by dx, total_cost desc
) as sq5
group by dx
) as mint
JOIN
(select dx, region, round(total_cost,0) as total_cost, discharge_num
from
(
select dx, concat(city,' ',state) as region, total_cost, discharge_num
from
(
select drg_definition as dx, provider_city as city, provider_state as state, sum(total_discharges) as discharge_num,
sum(average_medicare_payments) as total_cost
from [bigquery-public-data:medicare.inpatient_charges_2014]
group by dx, city, state
) as sq) as sq2
where dx in (select dx from
(
select drg_definition as dx, sum(total_discharges) as discharge_num
from [bigquery-public-data:medicare.inpatient_charges_2014]
group by dx
order by discharge_num desc
limit 10
)as sq3)
and region in
(select concat(city,' ',state) as region from
(select provider_city as city, provider_state as state, sum(total_discharges) as discharge_num
from [bigquery-public-data:medicare.inpatient_charges_2014]
group by city, state
order by discharge_num desc
limit 10
)as sq4)
order by dx, total_cost desc) as maint
on maint.total_cost = mint.min_cost and maint.dx = mint.dx
order by maint.dx
"""
inpatient_min_df = gbq.read_gbq(minquery_inpatient, project_id = project_id)
inpatient_dif_fig = createbarfig(inpatient_max_df, 'maint_dx', 'maint_total_cost',\
inpatient_min_df, 'maint_dx', 'maint_total_cost', 'maint_discharge_num', \
'maint_region', 'procedure name', 'total cost $',\
'Price differential among inpatient Medicare services')
cf.iplot(inpatient_dif_fig)
min_outpatient_query = """
select sq5.apc, sq5.total_payments, sq5.hospital_referral_region, sq5.num_services from
(select apc, min(total_payments) as min_payments
from
(
select apc, hospital_referral_region, sum(average_total_payments) as total_payments, sum(outpatient_services) as num_services
from [bigquery-public-data:medicare.outpatient_charges_2014]
where hospital_referral_region in
(select hospital_referral_region from
(
select hospital_referral_region, sum(outpatient_services) as num_services
from [bigquery-public-data:medicare.outpatient_charges_2014]
group by hospital_referral_region
order by num_services desc
limit 10) as sq1
)
and apc in
(select apc from
(
select apc, sum(outpatient_services) as num_services
from [bigquery-public-data:medicare.outpatient_charges_2014]
group by apc
order by num_services desc
limit 10) as sq2
)
group by apc, hospital_referral_region
order by apc, total_payments desc
) group by apc)as sq4
join
(select apc, hospital_referral_region, sum(average_total_payments) as total_payments, sum(outpatient_services) as num_services
from [bigquery-public-data:medicare.outpatient_charges_2014]
where hospital_referral_region in
(select hospital_referral_region from
(
select hospital_referral_region, sum(outpatient_services) as num_services
from [bigquery-public-data:medicare.outpatient_charges_2014]
group by hospital_referral_region
order by num_services desc
limit 10) as sq1
)
and apc in
(select apc from
(
select apc, sum(outpatient_services) as num_services
from [bigquery-public-data:medicare.outpatient_charges_2014]
group by apc
order by num_services desc
limit 10) as sq2
)
group by apc, hospital_referral_region
order by apc, total_payments desc
)as sq5
on sq4.apc = sq5.apc and sq4.min_payments = sq5.total_payments
order by sq5.apc
"""
outpatient_min_df = gbq.read_gbq(min_outpatient_query, project_id = project_id)
outpatient_min_df = outpatient_min_df.drop(outpatient_min_df.index[8])
max_outpatient_query = """
select sq5.apc, sq5.total_payments, sq5.hospital_referral_region, sq5.num_services from
(select apc, hospital_referral_region, sum(average_total_payments) as total_payments, sum(outpatient_services) as num_services
from [bigquery-public-data:medicare.outpatient_charges_2014]
where hospital_referral_region in
(select hospital_referral_region from
(
select hospital_referral_region, sum(outpatient_services) as num_services
from [bigquery-public-data:medicare.outpatient_charges_2014]
group by hospital_referral_region
order by num_services desc
limit 10) as sq1
)
and apc in
(select apc from
(
select apc, sum(outpatient_services) as num_services
from [bigquery-public-data:medicare.outpatient_charges_2014]
group by apc
order by num_services desc
limit 10) as sq2
)
group by apc, hospital_referral_region
order by apc, total_payments desc
)as sq5
join
(select apc, max(total_payments) as max_payments
from
(
select apc, hospital_referral_region, sum(average_total_payments) as total_payments, sum(outpatient_services) as num_services
from [bigquery-public-data:medicare.outpatient_charges_2014]
where hospital_referral_region in
(select hospital_referral_region from
(
select hospital_referral_region, sum(outpatient_services) as num_services
from [bigquery-public-data:medicare.outpatient_charges_2014]
group by hospital_referral_region
order by num_services desc
limit 10) as sq1
)
and apc in
(select apc from
(
select apc, sum(outpatient_services) as num_services
from [bigquery-public-data:medicare.outpatient_charges_2014]
group by apc
order by num_services desc
limit 10) as sq2
)
group by apc, hospital_referral_region
order by apc, total_payments desc
) group by apc)as sq6
on sq5.apc = sq6.apc and sq5.total_payments = sq6.max_payments
order by sq5.apc
"""
outpatient_max_df = gbq.read_gbq(max_outpatient_query, project_id = project_id)
outpatient_max_df = outpatient_max_df.drop(outpatient_max_df.index[8])
outpatient_dif_fig = createbarfig(outpatient_max_df, 'sq5_apc', 'sq5_total_payments',\
outpatient_min_df, 'sq5_apc', 'sq5_total_payments', 'sq5_num_services', \
'sq5_hospital_referral_region', 'procedure name', 'total cost $',\
'Price differential among outpatient Medicare services')
cf.iplot(outpatient_dif_fig)